home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power Programmierung
/
Power-Programmierung CD 2 (Tewi)(1994).iso
/
doc
/
mir
/
p_marc.c
< prev
next >
Wrap
Text File
|
1992-05-25
|
12KB
|
422 lines
/*
* SEE MIR TUTORIAL ONE, TOPIC 9.5 ... THIS IS OLD CODE THAT
* HAS NOT BEEN UPGRADED FOR LACK OF SAMPLE DATA.
*
* P_MARC - preprocesses Marc records to standard format.
* See document "MARC_REC" dated June 13, 1986 by
* Doug Lowry for detail.
*
* written: dblowry 31 july 86
* modified: dblowry 04 march 87
* Copyright (c) 1987 Innotech Inc.
*
* usage: p_marc [2] filename(s)
* or p_marc [2] which takes input from stdin
* The option "2" is used to specify MARC II format,
* which is the same as MARC except that the block and
* record size values are omitted.
*
* input: mrc_fields containing client fld identifiers
* and Reteaco fld nums & inv controls
* stdin, or files
*
* output: filename.orig or stdout if using stdin for input
*
* note: this version makes use of the 4th column to specify how
* specific fields are to be inverted and displayed.
*/
#include <local.h>
#ifndef STDIO_INCLUDED
#include <stdio.h>
#endif
#ifndef CTYPE_INCLUDED
#include <ctype.h>
#endif
/*
* declarations
*/
void load_inv_controls(), pre_process(), parse_directory();
void proc_field();
int get_data();
#define NUM_FLDS 999 /* max number of client fields */
#define NUM_CTLS 5 /* a safe (?) upper limit */
#define CLI_FLD_LEN_MAX 4 /* length of client supplied fld*/
#define RET_FLD_LEN 4 /* length of Reteaco fld num */
#define CTL_COL 3 /* Reteaco control column */
#define MAX_DAT_BYTES 9999 /* max bytes in record data */
#define MAX_DIR_ENT 500 /* max entries in directory */
/*
* global variables
*/
long file_location; /* cumulative bytes read, this file */
Bool marcII; /* MARC II format */
struct _mrc_direc {
int mrc_field_num;
int mrc_field_len;
int mrc_field_offset;
} direc[ MAX_DIR_ENT ];
int direc_entries;
struct _inv_ctl {
char client_fld_id[ CLI_FLD_LEN_MAX + 1 ];
char first_four[ NUM_CTLS ][ 4 ];
} ctl[ NUM_FLDS ];
int num_fields;
char data_buf[ MAX_DAT_BYTES ];
/*
* MAIN -
*/
main( argc, argv )
int argc;
char **argv;
{
int file, bgn, num_files;
char fname[ BUFSIZ ];
FILE *fp_in, *fp_out;
marcII = FALSE;
if( argc > 1 && strlen( argv[ 1 ] ) == 1 && argv[1][0] == '2'
)
marcII = TRUE;
num_files = argc - 1;
if( marcII )
num_files--;
load_inv_controls();
/* case of no files */
if( !num_files )
{
fp_in = stdin;
fp_out = stdout;
file_location= 0;
pre_process( fp_out, fp_in );
exit( 0 );
}
/* altenately do each file on command line */
bgn = 1;
if( marcII && num_files )
bgn = 2;
for( file = bgn; file < argc; ++file )
{
if ((fp_in = fopen( argv[ file ], "r" )) == NULL )
{
warning("%s: file \"%s\" does not exist\n",
argv[ 0 ], argv[ file ] );
continue;
}
sprintf( fname, "%s.orig", argv[ file ] );
fp_out = fopen( fname, "w" );
file_location= 0;
pre_process( fp_out, fp_in );
fclose( fp_in );
fclose( fp_out );
}
exit( 0 );
}
/*
* LOAD_INV_CONTROLS - loads in the Reteaco field identifiers and
inversion
* control characters corresponding to client's
* field identifiers. The info is assumed to be in
* the file "mrc_fields".
*/
static void
load_inv_controls()
{
FILE *fp;
char buf[ BUFSIZ ];
int i, j, col, fld;
fp = e_fopen( "mrc_fields", "r" );
for( fld = 0; fgets( buf, BUFSIZ, fp ) > 0; ++fld )
{
/* client's fld id */
for( col= 0; isalnum( buf[ col ] ) && col < CLI_FLD_LEN_MAX
;
++col )
ctl[ fld ].client_fld_id[ col ] = buf[ col ];
ctl[ fld ].client_fld_id[ col ] = '\0';
for( j = col ; isspace( buf[j] ); ++j )
;
/* Reteaco's fld num & ctl */
for( i = 0; buf[j] != '\t'; j += RET_FLD_LEN, ++i )
{
if ( buf[ j+3 ] == '0' ) buf[ j+3 ] = ' ';
strncpy( ctl[fld].first_four[i], &buf[j], RET_FLD_LEN
);
ctl[fld].first_four[ i ][ RET_FLD_LEN ] = '\0';
}
if( i < NUM_CTLS ) ctl[fld].first_four[ i ][ 0 ] = '\0';
}
num_fields = fld;
fclose( fp );
return;
}
/*
* PRE_PROCESS - Preprocesses text from the input file and writes
* result to supplied output file.
*/
static void
pre_process( fp_out, fp_in )
FILE *fp_out, *fp_in;
{
int field;
while( get_data( fp_in ) != EOF )
{
fputs( "000 \n", fp_out );
for( field= 0; field < direc_entries; ++field)
proc_field( field, fp_out );
}
return;
}
/*
* GET_DATA - Pulls in block header (if needed), then record
header
* and contents.
*/
#define BLK_SIZ 80
static int
get_data( fp )
FILE *fp;
{
static short latest_block; /* most recent block size
read */
static short block_bytes; /* cumulative bytes this
block */
static short record_bytes; /* total bytes in record */
short direc_bytes; /* bytes in record directory */
short data_bytes; /* bytes in record data fields */
char leader[24];
int four0, /* 4 if MARC, 0 if MARC II */
i;
short zilch;
four0 = 4;
if( marcII )
four0 = 0;
if ( file_location == 0 )
{
record_bytes= 0; /* initialize once per file */
latest_block= 0;
block_bytes= 0;
}
block_bytes += record_bytes;
/* ...If block header due to be read, attempt to evaluate it */
if ( !marcII && block_bytes == latest_block)
{
if(( fread( &latest_block, sizeof(short), 1, fp ) < 1 ) ||
( fread( &zilch, sizeof(short), 1, fp ) < 1 ))
return( EOF );
if ( zilch )
error( "Block size out of sync at byte %D\n",
file_location );
block_bytes= 4;
file_location += 4;
}
/* ...Read in the record size */
if( !marcII )
{
if(( fread( &record_bytes, sizeof(short), 1, fp ) < 1 ) ||
( fread( &zilch, sizeof(short), 1, fp ) < 1 ))
error( "Failure reading record size starting\
at byte %D\n", file_location );
if ( zilch )
error( "Record size out of sync at byte %D\n",
file_location );
file_location += 4;
}
/* Get and analyze the 24 byte marc record leader */
/* ...Activate the commented out items below when needed. */
for ( i= 0; i < 24 ; i++ )
{
leader[i]= fgetc( fp );
if ( leader[i] == EOF )
return( EOF );
}
/* rec_status= leader[5]; */
/* rec_type= leader[6]; */
/* biblio_cat= leader[7]; */
leader[5]= ' ';
if( marcII )
record_bytes = atoi( leader );
else if ((atoi( &leader[0] ) + four0 ) != record_bytes)
error( "Record length inconsistent at byte %D\n",
file_location );
/* indicator_count= leader[10] - '0'; */
leader[17]= ' ';
direc_bytes= atoi( &leader[12] ) - 24;
file_location += 24;
/* Now load the directory */
if ( direc_bytes > MAX_DIR_ENT * 12 ||
direc_bytes > MAX_DAT_BYTES )
error ( "Oversize directory at byte %D\n", file_location);
if ( fread( data_buf, sizeof(char), direc_bytes, fp ) <
direc_bytes )
error( "Failure reading directory starting at byte %D\n",
file_location );
file_location += direc_bytes;
/* ... and place directory into field structure */
if ( direc_bytes % 12 != 1 )
error( "Directory size not 12*N + 1 at byte %D\n",
file_location );
direc_entries= direc_bytes/12;
parse_directory();
/* Now load the data stream */
data_bytes= record_bytes - four0 - 24 - direc_bytes;
if ( data_bytes > MAX_DAT_BYTES )
error ( "Oversize record data at byte %D\n",
file_location);
if ( fread( data_buf, sizeof(char), data_bytes, fp ) <
data_bytes )
error( "Failure reading record data starting at byte %D\n",
file_location );
file_location += data_bytes;
return( 0 );
}
/*
* PARSE_DIRECTORY - Place ASCII of record directory into
structure
*/
static void
parse_directory()
{
int entry,i;
int off; /* offset within directory */
char num[4],len[5],offset[6];
off= 0;
num[3] = len[4] = offset[5] = ' ';
for( entry= 0; entry < direc_entries ; ++entry )
{
for( i= 0; i < 3 ; ++i )
num[ i ] = data_buf[ off++ ];
direc[entry].mrc_field_num = atoi( num );
for( i= 0; i < 4 ; ++i )
len[ i ] = data_buf[ off++ ];
direc[entry].mrc_field_len = atoi( len );
for( i= 0; i < 5 ; ++i )
offset[ i ] = data_buf[ off++ ];
direc[entry].mrc_field_offset = atoi( offset );
}
return;
}
/*
* PROC_FIELD - Preprocess a single field
*/
static void
proc_field( field, fpo )
int field;
FILE *fpo;
{
int low,high; /* limits within data stream */
int ctli, col, num, i;
char c, nxt, four[5];
low= direc[ field ].mrc_field_offset;
high= low + direc[ field ].mrc_field_len - 1;
num= direc[ field ].mrc_field_num - 1;
four[4] = '\0';
for ( ctli= 0; ctli < NUM_CTLS ; ++ctli )
{
if ( !ctl[ field ].first_four[ ctli ][ 0 ] ) break;
strncpy( four, ctl[num].first_four[ctli], 4 );
if ( four[3] == 'w' ) break; /* w= no invert, no display
*/
fputs( four, fpo );
col= 4;
for ( i= low; i <= high; ++i)
{
if (( c= data_buf[i] ) == 037 ) /* begin sub-field */
{
i++; /* swallow next char */
if ( col <= 4 )
continue;
fputc( '\n', fpo );
col= 0;
continue;
}
/* ...If at end of sub-field or line getting too long, set for new
line */
if ( c == 036 || ( col > 70 && isspace( c )) )
{
fputc( '\n', fpo );
col= 0;
continue;
}
/* ...Swallow other non-printing characters, beginning semi-colon
*/
if ( !isprint( c ))
continue;
if ( col == 4 && ( c == ';' || c == ' ' ))
continue;
/* ...Prior to writing, set up lead 4 columns if needed */
if ( col < 4 )
{
fputs( " ", fpo );
fputc( four[3], fpo );
col= 4;
}
if( c == ' ' )
{
nxt = data_buf[i+1];
if( nxt == ' ' || !isprint( nxt ) )
continue;
}
fputc( c, fpo );
col++;
}
}
if( c == 035 )
fputc( '\n', fpo );
return;
}